df_models <- read.csv('data/output/linear_models_summary.csv')
pool_intercept <- df_models[df_models$Model == "Complete pooling",'Intercept'][1]
pool_slope <- df_models[df_models$Model == "Complete pooling",'Slope_Bin'][1]
df_models %>%
filter(Model %in% c("Complete pooling")) %>%
ggplot(aes(Bin, rater_score)) +
geom_point(alpha = 0.7, position = jit_pos) +
geom_abline(aes(intercept = Intercept, slope = Slope_Bin), color = color_pals[1], size = .75) +
theme_tidybayes() +
scale_y_continuous(labels = scales::percent) +
scale_x_continuous(labels = c("0-20", "21-40", "41-60", "61-80"))+#limits = c(0, 4)) +
labs(title = "Aggregate Regression - Complete Pooling", subtitle = "Elbow Dataset - Linear Learning",
y = "Predicted Bin Score") -> pool_plot
df_models %>%
filter(Model %in% c("No pooling")) %>%
ggplot(aes(Bin, rater_score)) +
geom_abline(aes(intercept = Intercept, slope = Slope_Bin), color = color_pals[2], size = .5, alpha = 0.5) +
geom_point(alpha = 0.7, position = jit_pos) +
geom_abline(aes(intercept = pool_intercept, slope = pool_slope), color = color_pals[1], size = 0.6)+
theme_tidybayes() +
scale_y_continuous(labels = scales::percent) +
scale_x_continuous(labels = c("0-20", "21-40", "41-60", "61-80"))+ #limits = c(0, 4)) +
labs(title = "Individual Regressions - No pooling",
y = "Predicted Bin Score",
subtitle = "Elbow Dataset - Linear Learning") -> no_pool_plot
# Figure 1, 800*370
cowplot::plot_grid(pool_plot, no_pool_plot)
df_models %>%
filter(Model %in% c("Random Intercept")) %>%
ggplot(aes(Bin, rater_score)) +
geom_abline(aes(intercept = Intercept, slope = Slope_Bin), color = color_pals[3], size = .65, alpha = 0.5) +
geom_point(alpha = 0.7, position = jit_pos) +
geom_abline(aes(intercept = pool_intercept, slope = pool_slope), color = color_pals[1], size = 0.6)+
theme_tidybayes() +
scale_y_continuous(labels = scales::percent) +
scale_x_continuous(labels = c("0-20", "21-40", "41-60", "61-80"))+ #limits = c(0, 4)) +
labs(title = "Random Intercepts",
y = "Predicted Bin Score",
subtitle = "Elbow Dataset - Linear Learning") -> ri_plot
cowplot::plot_grid(no_pool_plot, ri_plot)
df_models %>%
filter(Model %in% c("Random Coefs")) %>%
ggplot(aes(Bin, rater_score)) +
geom_abline(aes(intercept = Intercept, slope = Slope_Bin), color = color_pals[4], size = .65, alpha = 0.5) +
geom_point(alpha = 0.7, position = jit_pos) +
geom_abline(aes(intercept = pool_intercept, slope = pool_slope), color = color_pals[1], size = 0.7)+
theme_tidybayes() +
scale_y_continuous(labels = scales::percent) +
scale_x_continuous(labels = c("0-20", "21-40", "41-60", "61-80"))+ #limits = c(0, 4)) +
labs(title = "Random Intercept and Slopes",
y = "Predicted Bin Score",
subtitle = "Elbow Dataset - Linear Learning") -> rc_plot
cowplot::plot_grid(ri_plot, rc_plot)
elbowsBin <- readRDS("data/interim/train_elbows_bins.rds")
# Sort by score
sort_raters <- elbowsBin %>%
group_by(RaterID) %>%
summarise(total_score = mean(rater_score)) %>%
arrange(desc(total_score)) %>%
pull(RaterID)
# List of raters of interest for final table
selected_raters <- sort_raters[seq(1, 226, 27)][c(1:4,6:9)]
rater_subset <- df_models %>%
filter(RaterID %in% selected_raters) %>%
mutate(ID = factor(RaterID, levels = selected_raters, labels = paste0("ID ", selected_raters)))
ggplot(rater_subset) +
aes(x = Bin, y = rater_score) +
# Set the color mapping in this layer so the points don't get a color
geom_abline(aes(intercept = Intercept, slope = Slope_Bin,
color = Model),
size = .75) +
geom_point() +
facet_wrap(~ ID, nrow = 2) +
# labs(x = xlab, y = ylab) +
scale_y_continuous(labels = scales::percent) +
scale_x_continuous(limits = c(0,4)) +
theme_tidybayes() +
# Fix the color palette
scale_color_brewer(palette = "Set1") +
theme(legend.position = "none") +
labs(y = "Predicted Bin Score")
df_models <- read.csv('data/output/logistic_models_summary.csv')
df_models %>%
ggplot(aes(Sequence, Accuracy, group = RaterID)) +
geom_smooth(method = "glm", method.args = list(family = binomial), se = F,
size = 0.5, alpha = 0.5, color = color_pals[2]) +
geom_smooth(method = "glm", method.args = list(family = binomial), se = F,
group = 1, size = 0.5, alpha = 0.5, color = color_pals[1]) +
# geom_point(alpha = 0.7, position = jit_pos) +
# geom_curve(aes(intercept = Intercept, slope = Slope_Seq), color = color_pals[1], size = .75) +
tidybayes::theme_tidybayes() +
scale_y_continuous(labels = scales::percent) +
# scale_x_continuous(labels = c("0-20", "21-40", "41-60", "61-80"))+#limits = c(0, 4)) +
labs(title = "Complete and No Pooling",
subtitle = "Elbow Dataset - Nonlinear Learning",
y = "Probability of Correct Response") -> p1
df_models %>%
filter(Model == "Random Intercept") %>%
ggplot(aes(Sequence, Accuracy, group = RaterID)) +
geom_line(aes(y = prob), color = green) +
geom_smooth(method = "glm", method.args = list(family = binomial), se = F,
group = 1, size = 0.5, alpha = 0.5, color = red) +
# geom_point(alpha = 0.7, position = jit_pos) +
# geom_curve(aes(intercept = Intercept, slope = Slope_Seq), color = color_pals[1], size = .75) +
tidybayes::theme_tidybayes() +
scale_y_continuous(labels = scales::percent, limits = c(0, 1)) +
# scale_x_continuous(labels = c("0-20", "21-40", "41-60", "61-80"))+#limits = c(0, 4)) +
labs(title = "Random Intercept",
subtitle = "Elbow Dataset - Nonlinear Learning",
y = "Probability of Correct Response") -> p2
df_models %>%
filter(Model == "Random Coefs") %>%
ggplot(aes(Sequence, Accuracy, group = RaterID)) +
geom_line(aes(y = prob), color = color_pals[4]) +
geom_smooth(method = "glm", method.args = list(family = binomial), se = F,
group = 1, size = 0.5, alpha = 0.5, color = red) +
# geom_point(alpha = 0.7, position = jit_pos) +
# geom_curve(aes(intercept = Intercept, slope = Slope_Seq), color = color_pals[1], size = .75) +
tidybayes::theme_tidybayes() +
scale_y_continuous(labels = scales::percent, limits = c(0, 1)) +
# scale_x_continuous(labels = c("0-20", "21-40", "41-60", "61-80"))+#limits = c(0, 4)) +
labs(title = "Random Intercept and Slopes",
subtitle = "Elbow Dataset - Nonlinear Learning",
y = "Probability of Correct Response") -> p3
# Figure 5, 800*370
cowplot::plot_grid(p1, p2, p3, nrow = 1)
Elbow
test_info_elbows <- read.csv('data/output/1PL-elbows-ICC.csv')
ggplot(test_info_elbows, aes(x = theta, y = prob, group = reorder(item, diff, mean))) +
geom_line(color = color_pals[2], alpha = 0.7) +
scale_y_continuous(labels = scales::percent) +
scale_x_continuous(breaks = -7:7, limits = c(-7, 7)) +
labs(x = "Person ability", y = "Probability of correct response", colour = "Item",
title = "Joint Item Characteristic Plot", subtitle = "Elbow Dataset") +
geom_segment(aes(y = 0.5, yend = 0.5, x = -Inf, xend = 4.156322), linetype = "dashed", alpha = 0.5, size = 0.2) +
geom_segment(aes(y = -Inf, yend = 0.5, x = 4.156322, xend = 4.156322), linetype = "dashed", alpha = 0.5, size = 0.2) +
tidybayes::theme_tidybayes()
ECG
test_info_ecg <- read.csv('data/output/1PL-ecg-ICC.csv')
ggplot(test_info_ecg, aes(x = theta, y = prob, group = reorder(item, diff, mean))) + geom_line(color = blue, alpha = 0.7) +
scale_y_continuous(labels = scales::percent) +
scale_x_continuous(breaks = -6:6, limits = c(-6, 6)) +
labs(x = "Person ability", y = "Probability of correct response", colour = "Item",
title = "Joint Item Characteristic Plot", subtitle = "ECG Dataset")+
tidybayes::theme_tidybayes()
lltm_elbows <- readRDS('data/output/LLTM-elbows.rds')
# lltm_elbows$CaseDx <- factor(lltm_elbows$CaseDx, levels = lltm_data$CaseDx)
lltm_elbows %>%
ggplot(aes(x = CaseDx, y = FE_dx + RE_item, fill = CaseDx)) +
geom_boxplot(alpha = 0.75, outlier.alpha = 0.5, outlier.size = 1) +
geom_point(size = 2, alpha = 0.7, show.legend = F) +
# geom_point(aes(y = FE_dx + logit)) +
geom_point(aes(y = FE_dx), shape = 18, size = 6, show.legend = F) +
theme_tidybayes() +
theme(axis.ticks.x = element_blank(), legend.text = element_text(size = 14),
axis.text.x = element_blank(),
axis.text = element_text(size = 15),
axis.title = element_text(size = 17), title = element_text(size = 18)) +
scale_fill_brewer(palette = "Paired") +
scale_y_continuous(limits = c(-4,4), breaks = seq(-4,4,1)) +
labs(y = "Log of Odds of Accurate Response", x = "",
title = "Distributions of Difficulty by Diagnosis",
subtitle = "Elbow Dataset",
fill = "Diagnosis")
lltm_ecg <- readRDS('data/output/LLTM-ecg.rds')
lltm_ecg %>%
ggplot(aes(x = cardiologistDxLabel, y = FE_dx + RE_item, fill = cardiologistDxLabel)) +
geom_boxplot(alpha = 0.75, outlier.alpha = 0.5, outlier.size = 1) +
geom_point(size = 2, alpha = 0.7, show.legend = F) +
# geom_point(aes(y = FE_dx + logit)) +
geom_point(aes(y = FE_dx), shape = 18, size = 6, show.legend = F) +
theme_tidybayes() +
theme(axis.ticks.x = element_blank(),
legend.text = element_text(size = 13),
axis.text.x = element_blank(),
axis.text = element_text(size = 15),
axis.title = element_text(size = 17),
title = element_text(size = 18)) +
scale_fill_brewer(palette = "Paired") +
scale_y_continuous(limits = c(-4, 4), breaks = seq(-4,4,1)) +
labs(y = "Log of Odds of Accurate Response", x = "",
title = "Distributions of Difficulty by Diagnosis",
subtitle = "ECG Dataset",
fill = "Diagnosis")
elbows <- readRDS("data/interim/train_elbows.rds")
m2 <- readRDS('models/mlm_logistic_Dx.rds')
rater1 <- elbows[elbows$RaterID == 34,]
rater1$sclSeq <- rater1$Sequence/20
rater_plot <- cbind(rater1, merTools::predictInterval(m2, newdata = rater1, level = 0.95,
n.sims=5000, type = 'probability',
include.resid.var = F))
ggplot(data = rater_plot, aes(x = Sequence, y = fit, ymin = lwr, ymax = upr, color = CaseType2)) +
geom_pointrange(alpha = 0.7) +
scale_y_continuous(labels = scales::percent_format(accuracy = 1L)) +
labs(y = "Probability of Correct Response", color = "Diagnosis",
x = "Sequence",
subtitle = paste0("Elbow Dataset - Learner ", 34)) + scale_color_brewer(palette = "Set2") +
theme_classic() +
theme(legend.position = "bottom", legend.title = element_blank(),
legend.text = element_text(size = 14),
axis.text = element_text(size = 14),
axis.title = element_text(size = 15), title = element_text(size = 16))
Test statistic: Mean and Standard Deviation of the Total Number of correct responses across all learners
Mean Simulations and Comparison
Standard Deviation Simulations and Comparison
sessionInfo()
## R version 4.0.0 (2020-04-24)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Catalina 10.15.7
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] tidybayes_2.0.3 lme4_1.1-23 Matrix_1.2-18 forcats_0.5.0
## [5] stringr_1.4.0 dplyr_1.0.0 purrr_0.3.4 readr_1.3.1
## [9] tidyr_1.1.0 tibble_3.0.1 ggplot2_3.3.2 tidyverse_1.3.0
##
## loaded via a namespace (and not attached):
## [1] nlme_3.1-147 fs_1.4.1 lubridate_1.7.8 RColorBrewer_1.1-2
## [5] httr_1.4.1 tools_4.0.0 backports_1.1.8 R6_2.4.1
## [9] DBI_1.1.0 mgcv_1.8-31 colorspace_1.4-1 withr_2.2.0
## [13] tidyselect_1.1.0 compiler_4.0.0 cli_2.0.2 rvest_0.3.5
## [17] arrayhelpers_1.1-0 xml2_1.3.2 labeling_0.3 scales_1.1.1
## [21] mvtnorm_1.1-0 blme_1.0-4 digest_0.6.25 minqa_1.2.4
## [25] rmarkdown_2.1 pkgconfig_2.0.3 htmltools_0.4.0 fastmap_1.0.1
## [29] dbplyr_1.4.3 rlang_0.4.6 readxl_1.3.1 rstudioapi_0.11
## [33] shiny_1.4.0.2 farver_2.0.3 generics_0.0.2 svUnit_1.0.3
## [37] jsonlite_1.6.1 magrittr_1.5 Rcpp_1.0.4.6 munsell_0.5.0
## [41] fansi_0.4.1 abind_1.4-5 lifecycle_0.2.0 stringi_1.4.6
## [45] yaml_2.2.1 merTools_0.5.0 MASS_7.3-51.5 plyr_1.8.6
## [49] grid_4.0.0 promises_1.1.0 crayon_1.3.4 lattice_0.20-41
## [53] haven_2.3.1 cowplot_1.0.0 splines_4.0.0 hms_0.5.3
## [57] knitr_1.28 pillar_1.4.4 boot_1.3-24 codetools_0.2-16
## [61] reprex_0.3.0 glue_1.4.1 evaluate_0.14 modelr_0.1.6
## [65] httpuv_1.5.2 vctrs_0.3.1 nloptr_1.2.2.1 foreach_1.5.0
## [69] cellranger_1.1.0 gtable_0.3.0 assertthat_0.2.1 xfun_0.13
## [73] mime_0.9 xtable_1.8-4 broom_0.5.6 later_1.0.0
## [77] coda_0.19-3 arm_1.11-1 iterators_1.0.12 statmod_1.4.34
## [81] ellipsis_0.3.1